# knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(modelsummary)
library(tidyverse)

ABSTRACT

This project is an analysis of The Myanmar coup of 2021. The main focus is on the economic and social characteristics of the area (township level) where the people were arrested and died in the coup. We select the characteristic variables through exploratory descriptive statistics and perform statistics by means of hypothesis testing and regression analysis. We draw the conclusion that the number of detainees is significantly related to the employment status of their area, the food poverty index, the literacy rate, and the total number of conflicts in the past period. As a result of the above analysis, we believe that it is feasible to improve the local economic situation as a starting point in order to stabilize the situation in Burma.

INTRODUCTION

In early February 2021, there was an army mutiny in Burma. This mutiny had a huge impact, with many citizens and opposition forces being detained, imprisoned and even killed. We hope that we can draw some useful conclusions by exploring the characteristics of these affected individuals themselves and the areas where they were located.

In our study the main focus of: Dependent Variable: Detainees per 1000
Independent Variable: Economy (Poverty Ratio, Food Poverty Ratio, Poverty Gap); Sex; Employment (Employee Number); Education
(Female, Male, Total).

We hope that through our analysis, we can give the Burmese authorities or international organizations concerned about the situation in Burma some insights related to ameliorating the chaos and promoting stability in Burma.

METHODS

Data Preprocessing

Data loading and cleaning

Get the main dataset of detainees, imprisoned, fallen. Clean the dataset as needed.

# Import starting data (AAPP)
detainees = read_csv("https://www.andrew.cmu.edu/user/jweiss2/21f_r/94842/final_2021/detained.csv.gz")
imprisoned = read_csv("https://www.andrew.cmu.edu/user/jweiss2/21f_r/94842/final_2021/imprisoned.csv.gz")
fallen = read_csv("https://www.andrew.cmu.edu/user/jweiss2/21f_r/94842/final_2021/fallen.csv")

# edit the township values in the detainees dataset
detainees <- detainees %>%
  mutate(Township = str_replace_all(Address, ".*,", "")) %>%
  mutate(Township = str_replace_all(Township, "T?own.*", ""))

# delete the null/non-functional values of age in the fallen dataset for future use
fallen <- fallen %>%
  # convert Age to the numeric type
  mutate(Age = as.numeric(Age)) %>%
  # delete the rows with null Age
  filter(!is.na(.$Age)) %>%
  # delete the rows with abnormal `States/Regions`
  filter(nchar(.$`States/Regions`) < 15) %>%
  mutate(`States/Regions` = str_replace_all(`States/Regions`, " ", ""))

Read the conflicts (ACLED) data.

conflicts = read_csv("https://www.andrew.cmu.edu/user/jweiss2/21f_r/94842/final_2021/conflicts.csv.gz")

Read the sector indicators (MIMU) data.

# sectors (MIMU)
tmp = tempfile(fileext = ".xlsm")
httr::GET(
  url = "https://www.andrew.cmu.edu/user/jweiss2/21f_r/94842/final_2021/MIMU_BaselineData_AllSectors_Countrywide_18Mar2021_revised.xlsm", 
  httr::write_disk(tmp)
)
## Response [https://www.andrew.cmu.edu/user/jweiss2/21f_r/94842/final_2021/MIMU_BaselineData_AllSectors_Countrywide_18Mar2021_revised.xlsm]
##   Date: 2021-12-10 04:38
##   Status: 200
##   Content-Type: application/vnd.ms-excel.sheet.macroEnabled.12
##   Size: 19.3 MB
## <ON DISK>  /var/folders/s8/65bc3d5x649_gpdcbjfqgzbw0000gn/T//RtmpV3f5R7/file185b2e9aa157.xlsm
# Township level of MIMU data
sector.indicators = 
  readxl::read_xlsx(tmp, sheet=3, skip = 5) %>% as_tibble()

# Organize all sectors data to be given by nested tibbles per indicator
sector.nest = sector.indicators %>% 
  select(1:3, 
         Indicator_Name, Indicator_Type, Sector, Unit, 
         starts_with("20"), Source_Name) %>%
  mutate(Indicator = paste(Indicator_Name, Indicator_Type, 
                           Sector, Unit, Source_Name, sep="|")) %>%
  select(1:3, Indicator, starts_with("20")) %>%
  pivot_longer(cols = starts_with("20"), 
               names_to = "Year", 
               values_to="Value") %>%
  filter(!is.na(Value)) %>%
  nest(data = -Indicator) %>%
  separate(Indicator, sep="\\|", 
           into = c("Indicator_Name", "Indicator_Type",
                    "Sector","Unit","Source_Name")) 

# State_Region level of MIMU data
sector.indicators.state.level = 
  readxl::read_xlsx(tmp, sheet=2, skip = 5) %>% as_tibble()

# Organize all sectors data to be given by nested tibbles per indicator
sector.state.level.nest = sector.indicators.state.level %>% 
  select(1:3, 
         Indicator_Name, Indicator_Type, Sector, Unit, 
         starts_with("20"), Source_Name) %>%
  mutate(Indicator = paste(Indicator_Name, Indicator_Type, 
                           Sector, Unit, Source_Name, sep="|")) %>%
  select(1:3, Indicator, starts_with("20")) %>%
  pivot_longer(cols = starts_with("20"), 
               names_to = "Year", 
               values_to="Value") %>%
  filter(!is.na(Value)) %>%
  nest(data = -Indicator) %>%
  separate(Indicator, sep="\\|", 
           into = c("Indicator_Name", "Indicator_Type",
                    "Sector","Unit","Source_Name")) 
# We can use the Levenshtein distance to find approximate matches at the township level.

#' level computes the levenshtein distance between x and each y and returns
#' the ones that are within k of the smallest value as an ordered vector. 
#' See ?adist for details.
#' @return data.frame of hits
leven = function(x, y, k=0, ignore.case=T) {
  data.frame(y=y) %>%
    as_tibble() %>%
    
    # compute Levenshtein distance for string x for each y
    mutate(distance = utils::adist(x, y, 
                                   ignore.case=ignore.case) %>% .[1,]) %>%
    
    # keep y's within k of the best match
    filter(distance <= min(distance, na.rm=T) + k) %>%
    mutate(distance.per.char = distance/nchar(y))
}

# Use `leven` for string *vectors* `x` and `y`
apply_leven = function(x, y, k=0, distance.threshold=0.3, ignore.case=F) {
  data.frame(x=x) %>%
    # get potential matches for each x as a list of tibbles
    mutate(leven.df = map(x, ~ leven(.x, y=y,
                                     k=k, ignore.case=ignore.case))
    ) %>%
    unnest(everything()) %>%
    mutate(is.match=distance.per.char < distance.threshold) %>%
    
    # order by best match
    arrange(distance.per.char) %>%
    
    # keep the best match per `x`
    group_by(x) %>% 
    slice(1) %>%  
    ungroup() %>%
    
    # convert non-matches to Other
    mutate(y = ifelse(is.match, y, "Other"))  
}

Data merging

Merge data and calculate the number of detainees per 1000

township.sizes = sector.nest %>% 
  filter(Indicator_Name=="Population size", 
         Indicator_Type=="Total", 
         str_detect(Source_Name, "Census")) %>%
  unnest(everything())

# y (regression)
detainees.per.1000 <- detainees %>% 
  # part (c): nesting
  nest(data=-Township) %>%
  # part (d): merging with the detainee_township, MIMU_township mapper
  inner_join(apply_leven(.$Township,
                         township.sizes$Township_Name,
                         distance.threshold=0.28),
             by=c("Township"="x")
  ) %>%
  rename(detainee_township=Township, MIMU_township=y) %>%
  # part (e): summarise at MIMU_township level
  group_by(MIMU_township) %>%
  summarise(detainees = sum(map_dbl(data,nrow))) %>%
  ungroup() %>%
  # part (f): attach MIMU indicator and compute outcome "detainees.per.1000"
  left_join(township.sizes, by=c("MIMU_township"="Township_Name")) %>%
  mutate(detainees.per.1000 = detainees/Value) %>%
  arrange(desc(detainees)) %>%
  select(MIMU_township, detainees, detainees.per.1000)

In the conflicts (ACLED) dataset, total number of conflicts per township

conflicts.num.by.township <- conflicts %>%
  group_by(admin3) %>%
  count() %>%
  mutate(conflicts.num = n) %>%
  select(admin3, conflicts.num) %>%
  rename(Township_Name = admin3)

conflicts.num.by.township %>% head(5)
## # A tibble: 5 × 2
## # Groups:   Township_Name [5]
##   Township_Name  conflicts.num
##   <chr>                  <int>
## 1 Ahlone                    22
## 2 Amarapura                 58
## 3 Ann                       12
## 4 Aunglan                   20
## 5 Aungmyaythazan           109

Data preprocessing on MIMU

In the township level, extract the total employee number per town

sector.employee <- sector.indicators %>%
  filter(Indicator_Type=="Employer - Total")
sector.employee %>% head(5)
## # A tibble: 5 × 30
##   State_Region Township_Name Township_Pcode Sector  Sub_Sector  Indicator_Name  
##   <chr>        <chr>         <chr>          <chr>   <chr>       <chr>           
## 1 Ayeyarwady   Bogale        MMR017024      Economy Usual Acti… Population (10 …
## 2 Ayeyarwady   Danubyu       MMR017022      Economy Usual Acti… Population (10 …
## 3 Ayeyarwady   Dedaye        MMR017026      Economy Usual Acti… Population (10 …
## 4 Ayeyarwady   Einme         MMR017015      Economy Usual Acti… Population (10 …
## 5 Ayeyarwady   Hinthada      MMR017008      Economy Usual Acti… Population (10 …
## # … with 24 more variables: Indicator_Type <chr>, Unit <chr>, 2009-2010 <dbl>,
## #   2010 <dbl>, 2010-2011 <dbl>, 2011 <dbl>, 2011-2012 <dbl>, 2012 <dbl>,
## #   2012-2013 <dbl>, 2013 <dbl>, 2013-2014 <dbl>, 2014 <dbl>, 2014-2015 <dbl>,
## #   2015 <dbl>, 2015-2016 <dbl>, 2016 <dbl>, 2016-2017 <dbl>, 2017 <dbl>,
## #   2017-2018 <dbl>, 2018 <dbl>, 2018-2019 <dbl>, 2019 <dbl>, 2019-2020 <dbl>,
## #   Source_Name <chr>
nrow(sector.employee[is.na(sector.employee$`2014`),])
## [1] 0

For the employment data, there is no missing on the column of “2014”, so we use the data on employee of 2014 for analysis use.

In the township level, extract the education and sex data

literacy.df = subset(sector.indicators,Indicator_Name == "Adult literacy rate" & (Indicator_Type == "Total" | Indicator_Type == "Female" | Indicator_Type == "Male")) %>%
  select(State_Region,Township_Name,Indicator_Type,"2014")


literacy.df.fem <- subset(literacy.df,Indicator_Type == "Female")
literacy.df.male <- subset(literacy.df,Indicator_Type == "Male")
literacy.df.total <- subset(literacy.df,Indicator_Type == "Total")

liter.sex.merged <- merge(literacy.df.fem,literacy.df.male,by=c("Township_Name","State_Region"))
literacy.merged <- merge(liter.sex.merged,literacy.df.total,by=c("Township_Name","State_Region"))
literacy.merged <- literacy.merged %>%
  rename(fem.literacy = "2014.x", male.literacy = "2014.y", total.literacy = "2014") %>%
  select(State_Region,Township_Name,fem.literacy,male.literacy,total.literacy)

literacy.merged %>% head(5)
##   State_Region  Township_Name fem.literacy male.literacy total.literacy
## 1       Yangon         Ahlone         97.9          99.2           98.5
## 2     Mandalay      Amarapura         93.8          97.8           95.6
## 3      Rakhine            Ann         71.1          88.0           79.0
## 4       Magway        Aunglan         90.5          96.9           93.4
## 5     Mandalay Aungmyaythazan         95.8          98.8           97.1
sex.ratio.df = subset(sector.indicators,Indicator_Name == "Sex ratio" & Source_Name == "MMR_MOIP/DOP, The 2014 Myanmar Population and Housing Census") %>%
  select(State_Region,Township_Name,Indicator_Type,"2014")
sex.ratio.df %>% head(5)
## # A tibble: 5 × 4
##   State_Region Township_Name Indicator_Type `2014`
##   <chr>        <chr>         <chr>           <dbl>
## 1 Ayeyarwady   Bogale        Total            97.5
## 2 Ayeyarwady   Danubyu       Total            91.7
## 3 Ayeyarwady   Dedaye        Total            96.4
## 4 Ayeyarwady   Einme         Total            95.1
## 5 Ayeyarwady   Hinthada      Total            89.3

In the state level, extract the income inequality value: - “Poverty headcount ratio” - “Food poverty headcount index” - “Poverty gap ratio”

# "Poverty headcount ratio"
sector.income.poverty.ratio <- sector.indicators.state.level %>%
  filter(Indicator_Name == "Poverty headcount ratio" & Indicator_Type == "Total")
sector.income.poverty.ratio %>% head(5)
## # A tibble: 5 × 30
##   State_Region SR_Pcode Sector  Sub_Sector Indicator_Name   Indicator_Type Unit 
##   <chr>        <chr>    <chr>   <chr>      <chr>            <chr>          <chr>
## 1 Ayeyarwady   MMR017   Economy Income     Poverty headcou… Total          Perc…
## 2 Bago (East)  MMR007   Economy Income     Poverty headcou… Total          Perc…
## 3 Bago (West)  MMR008   Economy Income     Poverty headcou… Total          Perc…
## 4 Chin         MMR004   Economy Income     Poverty headcou… Total          Perc…
## 5 Kachin       MMR001   Economy Income     Poverty headcou… Total          Perc…
## # … with 23 more variables: 2009-2010 <dbl>, 2010 <dbl>, 2010-2011 <dbl>,
## #   2011 <dbl>, 2011-2012 <dbl>, 2012 <dbl>, 2012-2013 <dbl>, 2013 <dbl>,
## #   2013-2014 <dbl>, 2014 <dbl>, 2014-2015 <dbl>, 2015 <dbl>, 2015-2016 <dbl>,
## #   2016 <dbl>, 2016-2017 <dbl>, 2017 <dbl>, 2017-2018 <dbl>, 2018 <dbl>,
## #   2018-2019 <dbl>, 2019 <dbl>, 2019-2020 <dbl>, 2020 <dbl>, Source_Name <chr>
nrow(sector.income.poverty.ratio[is.na(sector.income.poverty.ratio$`2010`),])
## [1] 0
# "Food poverty headcount index"
sector.income.food.poverty.index <- sector.indicators.state.level %>%
  filter(Indicator_Name == "Food poverty headcount index" & Indicator_Type == "Total")
sector.income.food.poverty.index %>% head(5)
## # A tibble: 5 × 30
##   State_Region SR_Pcode Sector  Sub_Sector Indicator_Name   Indicator_Type Unit 
##   <chr>        <chr>    <chr>   <chr>      <chr>            <chr>          <chr>
## 1 Ayeyarwady   MMR017   Economy Income     Food poverty he… Total          Perc…
## 2 Bago (East)  MMR007   Economy Income     Food poverty he… Total          Perc…
## 3 Bago (West)  MMR008   Economy Income     Food poverty he… Total          Perc…
## 4 Chin         MMR004   Economy Income     Food poverty he… Total          Perc…
## 5 Kachin       MMR001   Economy Income     Food poverty he… Total          Perc…
## # … with 23 more variables: 2009-2010 <dbl>, 2010 <dbl>, 2010-2011 <dbl>,
## #   2011 <dbl>, 2011-2012 <dbl>, 2012 <dbl>, 2012-2013 <dbl>, 2013 <dbl>,
## #   2013-2014 <dbl>, 2014 <dbl>, 2014-2015 <dbl>, 2015 <dbl>, 2015-2016 <dbl>,
## #   2016 <dbl>, 2016-2017 <dbl>, 2017 <dbl>, 2017-2018 <dbl>, 2018 <dbl>,
## #   2018-2019 <dbl>, 2019 <dbl>, 2019-2020 <dbl>, 2020 <dbl>, Source_Name <chr>
nrow(sector.income.food.poverty.index[is.na(sector.income.food.poverty.index$`2010`),])
## [1] 0
# "Poverty gap ratio"
sector.income.poverty.gap.ratio <- sector.indicators.state.level %>%
  filter(Indicator_Name == "Poverty gap ratio" & Indicator_Type == "Total")
sector.income.poverty.gap.ratio %>% head(5)
## # A tibble: 5 × 30
##   State_Region SR_Pcode Sector  Sub_Sector Indicator_Name   Indicator_Type Unit 
##   <chr>        <chr>    <chr>   <chr>      <chr>            <chr>          <chr>
## 1 Ayeyarwady   MMR017   Economy Income     Poverty gap rat… Total          Perc…
## 2 Bago (East)  MMR007   Economy Income     Poverty gap rat… Total          Perc…
## 3 Bago (West)  MMR008   Economy Income     Poverty gap rat… Total          Perc…
## 4 Chin         MMR004   Economy Income     Poverty gap rat… Total          Perc…
## 5 Kachin       MMR001   Economy Income     Poverty gap rat… Total          Perc…
## # … with 23 more variables: 2009-2010 <dbl>, 2010 <dbl>, 2010-2011 <dbl>,
## #   2011 <dbl>, 2011-2012 <dbl>, 2012 <dbl>, 2012-2013 <dbl>, 2013 <dbl>,
## #   2013-2014 <dbl>, 2014 <dbl>, 2014-2015 <dbl>, 2015 <dbl>, 2015-2016 <dbl>,
## #   2016 <dbl>, 2016-2017 <dbl>, 2017 <dbl>, 2017-2018 <dbl>, 2018 <dbl>,
## #   2018-2019 <dbl>, 2019 <dbl>, 2019-2020 <dbl>, 2020 <dbl>, Source_Name <chr>
nrow(sector.income.poverty.gap.ratio[is.na(sector.income.poverty.gap.ratio$`2010`),])
## [1] 0

For the income data, there is no missing on the column of “2010”, so we use the data on income of 2010 for analysis use.

Merge all the data into 1 tibble

sector.employee %>% arrange(Township_Name)
## # A tibble: 330 × 30
##    State_Region Township_Name  Township_Pcode Sector  Sub_Sector  Indicator_Name
##    <chr>        <chr>          <chr>          <chr>   <chr>       <chr>         
##  1 Yangon       Ahlone         MMR013037      Economy Usual Acti… Population (1…
##  2 Mandalay     Amarapura      MMR010006      Economy Usual Acti… Population (1…
##  3 Rakhine      Ann            MMR012014      Economy Usual Acti… Population (1…
##  4 Magway       Aunglan        MMR009016      Economy Usual Acti… Population (1…
##  5 Mandalay     Aungmyaythazan MMR010001      Economy Usual Acti… Population (1…
##  6 Sagaing      Ayadaw         MMR005014      Economy Usual Acti… Population (1…
##  7 Bago (East)  Bago           MMR007001      Economy Usual Acti… Population (1…
##  8 Yangon       Bahan          MMR013044      Economy Usual Acti… Population (1…
##  9 Sagaing      Banmauk        MMR005023      Economy Usual Acti… Population (1…
## 10 Kayah        Bawlake        MMR002005      Economy Usual Acti… Population (1…
## # … with 320 more rows, and 24 more variables: Indicator_Type <chr>,
## #   Unit <chr>, 2009-2010 <dbl>, 2010 <dbl>, 2010-2011 <dbl>, 2011 <dbl>,
## #   2011-2012 <dbl>, 2012 <dbl>, 2012-2013 <dbl>, 2013 <dbl>, 2013-2014 <dbl>,
## #   2014 <dbl>, 2014-2015 <dbl>, 2015 <dbl>, 2015-2016 <dbl>, 2016 <dbl>,
## #   2016-2017 <dbl>, 2017 <dbl>, 2017-2018 <dbl>, 2018 <dbl>, 2018-2019 <dbl>,
## #   2019 <dbl>, 2019-2020 <dbl>, Source_Name <chr>
detainees.per.1000 %>% arrange(MIMU_township)
## # A tibble: 172 × 3
##    MIMU_township detainees detainees.per.1000
##    <chr>             <dbl>              <dbl>
##  1 Ahlone                1            0.0180 
##  2 Aunglan               1            0.00425
##  3 Ayadaw               10            0.0642 
##  4 Bago                 44            0.0895 
##  5 Bahan                17            0.176  
##  6 Bilin                 1            0.00552
##  7 Bogale                2            0.00620
##  8 Botahtaung            7            0.171  
##  9 Budalin               5            0.0405 
## 10 Chauk                 1            0.00540
## # … with 162 more rows
# merge detainees.per.1000 and sector.employee to detainees.employee
detainees.employee <- detainees.per.1000 %>%
  left_join(sector.employee, by=c("MIMU_township"="Township_Name")) %>%
  select("MIMU_township","detainees.per.1000","detainees","State_Region","2014") %>%
  filter(MIMU_township!="Other") %>%
  rename("employee.num"="2014")
# detainees.employee %>% head(5)


# merge detainees.employee and sector.income.poverty.ratio to detainees.employee.income
detainees.employee.income <- detainees.employee %>%
  left_join(sector.income.poverty.ratio, by=c("State_Region"="State_Region")) %>%
  select("MIMU_township", "State_Region", "detainees.per.1000", "detainees", "employee.num", "2010") %>%
  # na.omit() %>%
  rename("poverty.ratio"="2010")

detainees.employee.income <- detainees.employee.income %>%
  left_join(sector.income.food.poverty.index, by=c("State_Region"="State_Region")) %>%
  select("MIMU_township", "State_Region", "detainees.per.1000", "detainees", "employee.num", "poverty.ratio", "2010") %>%
  # na.omit() %>%
  rename("food.poverty.index"="2010")

detainees.employee.income <- detainees.employee.income %>%
  left_join(sector.income.poverty.gap.ratio, by=c("State_Region"="State_Region")) %>%
  select("MIMU_township", "State_Region", "detainees.per.1000", "detainees", "employee.num", "poverty.ratio", "food.poverty.index", "2010") %>%
  # delete the na data, 2 examples in the state `Nay Pyi Taw`
  na.omit() %>%
  rename("poverty.gap.ratio"="2010")

# detainees.employee.income %>% head(5)

# merge detainees.employee.income and literacy.merged to detainees.employee.income.education
detainees.employee.income.education <- detainees.employee.income %>%
  left_join(literacy.merged, by=c("MIMU_township"="Township_Name","State_Region"="State_Region")) %>%
  select("MIMU_township", "State_Region", "detainees.per.1000", "detainees", "employee.num", "poverty.ratio", "food.poverty.index", "poverty.gap.ratio", "fem.literacy", "male.literacy", "total.literacy")

# merge detainees.employee.income.education and sex.ratio.df to detainees.employee.income.education.sex
detainees.employee.income.education.sex <- detainees.employee.income.education %>%
  left_join(sex.ratio.df, by=c("MIMU_township"="Township_Name","State_Region"="State_Region")) %>%
  select("MIMU_township", "State_Region", "detainees.per.1000", "detainees", "employee.num", "poverty.ratio", "food.poverty.index", "poverty.gap.ratio", "fem.literacy", "male.literacy", "total.literacy","2014") %>%
  na.omit() %>%
  rename("sex.ratio"="2014")

# detainees.employee.income.education.sex %>% head(5)

# merge detainees.employee.income.education.sex and conflicts.num.by.township to merged.for.regression
merged.for.regression <- unique(detainees.employee.income.education.sex) %>%
  left_join(conflicts.num.by.township, by=c("MIMU_township"="Township_Name"))

merged.for.regression <- merged.for.regression %>%
  arrange(.,MIMU_township) 

merged.for.regression = merged.for.regression[-c(81,84),]

merged.for.regression
## # A tibble: 169 × 13
##    MIMU_township State_Region detainees.per.1000 detainees employee.num
##    <chr>         <chr>                     <dbl>     <dbl>        <dbl>
##  1 Ahlone        Yangon                  0.0180          1         1115
##  2 Aunglan       Magway                  0.00425         1         4481
##  3 Ayadaw        Sagaing                 0.0642         10         2021
##  4 Bago          Bago (East)             0.0895         44         9162
##  5 Bahan         Yangon                  0.176          17         2641
##  6 Bilin         Mon                     0.00552         1         3424
##  7 Bogale        Ayeyarwady              0.00620         2        10602
##  8 Botahtaung    Yangon                  0.171           7         1001
##  9 Budalin       Sagaing                 0.0405          5         2767
## 10 Chauk         Magway                  0.00540         1         3390
## # … with 159 more rows, and 8 more variables: poverty.ratio <dbl>,
## #   food.poverty.index <dbl>, poverty.gap.ratio <dbl>, fem.literacy <dbl>,
## #   male.literacy <dbl>, total.literacy <dbl>, sex.ratio <dbl>,
## #   conflicts.num <int>

Data Summarizations

data summarizations on Detainees dataset and other variables

summary(detainees)
##        No           Name               Sex                Age           
##  Min.   :   1   Length:6709        Length:6709        Length:6709       
##  1st Qu.:2066   Class :character   Class :character   Class :character  
##  Median :3771   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :3717                                                           
##  3rd Qu.:5447                                                           
##  Max.   :7194                                                           
##  NA's   :4                                                              
##  Father's Name         Status          Date of Arrest     Section of Law    
##  Length:6709        Length:6709        Length:6709        Length:6709       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   Plaintiff         Current Condition    Address          Region/State      
##  Length:6709        Length:6709        Length:6709        Length:6709       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     Remark            Township        
##  Length:6709        Length:6709       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
str(detainees)
## spec_tbl_df [6,709 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ No               : num [1:6709] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Name             : chr [1:6709] "Aung San Suu Kyi" "Win Myint" "Tun Tun Hein" "Dr. Zaw Myint\rMaung" ...
##  $ Sex              : chr [1:6709] "F" "M" "M" "M" ...
##  $ Age              : chr [1:6709] NA NA NA NA ...
##  $ Father's Name    : chr [1:6709] "General Aung San" "U Tun Kyin" "U Kya Hein" "U Chit Maung" ...
##  $ Status           : chr [1:6709] "MP (Pyithu\rHluttaw, Kawthmu\rTownship, Yangon\rRegion),\rGovernment (State\rCounsellor), NLD\r(Chairperson)" "MP (Pyithu\rHluttaw, Tamwe\rTownship, Yangon\rRegion),\rGovernment\r(President), NLD\r(Vice Chairman 1)" "MP (Pyithu\rHluttaw, Naungcho\rTownship, Shan\rState), Pyithu\rHluttaw Deputy\rSpeaker, NLD\r(Central Executive" "MP (Regional\rHluttaw,\rAmarapura\rTownship,\rMandalay Region),\rGovernment (Chief\rMinister of" ...
##  $ Date of Arrest   : chr [1:6709] "1-Feb-21" "1-Feb-21" "1-Feb-21 and 10-\rFeb-21" "1-Feb-21 and 7-\rFeb-21" ...
##  $ Section of Law   : chr [1:6709] "Export and Import\rLaw S: 8 and\rNatural Disaster\rManagement law S:\r25, Penal Code S:\r505 (b),\rTelecommunic"| __truncated__ "Natural Disaster\rManagement law S:\r25, Penal Code S:\r505 (b)" NA "Penal Code S: 505\r(b), Natural\rDisaster\rManagement law S:\r25, 30, Anti-\rCorruption Law S:\r55" ...
##  $ Plaintiff        : chr [1:6709] "Superintendent\rKyi Linn of\rSpecial Branch,\rDekkhina District\rAdministrator (S:\r8 and 67),\rSuperintendent\"| __truncated__ "Superintendent\rMyint Naing,\rDekkhina District\rAdministrator" NA "Deputy Director\rNweni Khine of\rTownship\rGeneral\rAdministration\rDepartment" ...
##  $ Current Condition: chr [1:6709] "House Arrest" "House Arrest" "Detained" "Detained in\rMandalay\rPrison" ...
##  $ Address          : chr [1:6709] "Naypyitaw" "Naypyitaw" "Naypyitaw" "Mandalay" ...
##  $ Region/State     : chr [1:6709] "Naypyitaw" "Naypyitaw" "Naypyitaw" "Mandalay" ...
##  $ Remark           : chr [1:6709] "Myanmar Military Seizes Power and\rSenior NLD leaders including Daw\rAung San Suu Kyi and President U\rWin Myin"| __truncated__ "Senior NLD leaders including Daw\rAung San Suu Kyi and President U\rWin Myint were detained. The\rNLD’s chief"| __truncated__ "Senior NLD leaders including Daw\rAung San Suu Kyi and President U\rWin Myint were detained. The\rNLD’s chief"| __truncated__ "Senior NLD leaders including Daw\rAung San Suu Kyi and President U\rWin Myint were detained. The\rNLD’s chief"| __truncated__ ...
##  $ Township         : chr [1:6709] "Naypyitaw" "Naypyitaw" "Naypyitaw" "Mandalay" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   No = col_double(),
##   ..   Name = col_character(),
##   ..   Sex = col_character(),
##   ..   Age = col_character(),
##   ..   `Father's Name` = col_character(),
##   ..   Status = col_character(),
##   ..   `Date of Arrest` = col_character(),
##   ..   `Section of Law` = col_character(),
##   ..   Plaintiff = col_character(),
##   ..   `Current Condition` = col_character(),
##   ..   Address = col_character(),
##   ..   `Region/State` = col_character(),
##   ..   Remark = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
detainees.employee %>%
  datasummary_skim()
Unique (#) Missing (%) Mean SD Min Median Max
detainees.per.1000 171 0 0.1 0.1 0.0 0.0 0.9
detainees 40 0 10.6 15.5 1.0 5.0 97.0
employee.num 170 0 4111.1 3174.0 33.0 3351.0 18922.0
summary(detainees.employee)
##  MIMU_township      detainees.per.1000   detainees     State_Region      
##  Length:173         Min.   :0.001454   Min.   : 1.00   Length:173        
##  Class :character   1st Qu.:0.013888   1st Qu.: 2.00   Class :character  
##  Mode  :character   Median :0.035709   Median : 5.00   Mode  :character  
##                     Mean   :0.065149   Mean   :10.62                     
##                     3rd Qu.:0.083757   3rd Qu.:11.00                     
##                     Max.   :0.868666   Max.   :97.00                     
##   employee.num  
##  Min.   :   33  
##  1st Qu.: 1884  
##  Median : 3351  
##  Mean   : 4111  
##  3rd Qu.: 5637  
##  Max.   :18922
str(detainees.employee)
## tibble [173 × 5] (S3: tbl_df/tbl/data.frame)
##  $ MIMU_township     : chr [1:173] "Monywa" "Myeik" "Thingangyun" "Insein" ...
##  $ detainees.per.1000: num [1:173] 0.261 0.327 0.353 0.18 0.17 ...
##  $ detainees         : num [1:173] 97 93 74 55 54 50 48 45 44 43 ...
##  $ State_Region      : chr [1:173] "Sagaing" "Tanintharyi" "Yangon" "Yangon" ...
##  $ employee.num      : num [1:173] 9411 2889 3438 3461 4132 ...

Data summarizations on the fallen and imprisoned dataset

summary(fallen)
##      No.                Name               Sex                 Age       
##  Length:895         Length:895         Length:895         Min.   : 1.00  
##  Class :character   Class :character   Class :character   1st Qu.:22.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :30.00  
##                                                           Mean   :31.81  
##                                                           3rd Qu.:39.00  
##                                                           Max.   :90.00  
##  Father's name      Date of Incident   Deceased Date      Organization      
##  Length:895         Length:895         Length:895         Length:895        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Place of Incidents Home Address         Township         States/Regions    
##  Length:895         Length:895         Length:895         Length:895        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Remarks         
##  Length:895        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
str(fallen)
## spec_tbl_df [895 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ No.               : chr [1:895] "1" "2" "3" "4" ...
##  $ Name              : chr [1:895] "Na Pwar (aka) Ko\rNyi Nyi Oo" "Mya Thwate Thwate\rKhaing" "Nay Nay Win Htet" "Thet Naing Win\r(aka) Min Min" ...
##  $ Sex               : chr [1:895] "M" "F" "M" "M" ...
##  $ Age               : num [1:895] 32 19 18 37 16 48 30 26 30 35 ...
##  $ Father's name     : chr [1:895] "U Hla Ngwe" "U Min Lwin" "Unknown" "U Maung\rSan" ...
##  $ Date of Incident  : chr [1:895] "08-Feb-21" "09-Feb-21" "15-Feb-21" "20-Feb-21" ...
##  $ Deceased Date     : chr [1:895] "8-Feb-21" "19-Feb-21" "15-Feb-21" "20-Feb-21" ...
##  $ Organization      : chr [1:895] "Civilian" "Student" "Civilian" "Civilian" ...
##  $ Place of Incidents: chr [1:895] "Mandalay" "Naypyitaw" "Myeik,\rTanintharyi\rRegion" "Kannar Road,\rMandalay City" ...
##  $ Home Address      : chr [1:895] "75 Street,\rbetween 37 and\r38 Street" "Hlaykhwintaung,\rLower\rPaunglaung\rHydro Power\rProject" "Toe Chal Ward" "Near 41 Street" ...
##  $ Township          : chr [1:895] "Maha Aung\rMyay" "Zeyathiri" "Myeik" "Maha Aung\rMyay" ...
##  $ States/Regions    : chr [1:895] "Mandalay" "Naypyitaw" "Tanintharyi" "Mandalay" ...
##  $ Remarks           : chr [1:895] "In another incident, 32 year old Ko\rNa Pwar ((aka) Ko Ko Oo), died after\ra car intentionally hit him at night\rin Mandalay." "On February 9, peaceful anti-coup\rprotests in Naypyitaw were\rsuppressed using a water cannon,\rrubber bullets"| __truncated__ "On 15 February evening, 18-year old\rMaung Nay Nay Win Htet was\rbeaten on his head to death while\rguarding a "| __truncated__ "In Mandalay, a shipyaroad raid\rturned violent on Saturday when\rsecurity forces opened fire on\rdemonstrators "| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   No. = col_character(),
##   ..   Name = col_character(),
##   ..   Sex = col_character(),
##   ..   Age = col_character(),
##   ..   `Father's name` = col_character(),
##   ..   `Date of Incident` = col_character(),
##   ..   `Deceased Date` = col_character(),
##   ..   Organization = col_character(),
##   ..   `Place of Incidents` = col_character(),
##   ..   `Home Address` = col_character(),
##   ..   Township = col_character(),
##   ..   `States/Regions` = col_character(),
##   ..   Remarks = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
fallen %>%
  datasummary_skim()
Unique (#) Missing (%) Mean SD Min Median Max
Age 72 0 31.8 13.1 1.0 30.0 90.0
fallen %>% head(5)
## # A tibble: 5 × 13
##   No.   Name        Sex     Age `Father's name` `Date of Incide… `Deceased Date`
##   <chr> <chr>       <chr> <dbl> <chr>           <chr>            <chr>          
## 1 1     "Na Pwar (… M        32 "U Hla Ngwe"    08-Feb-21        8-Feb-21       
## 2 2     "Mya Thwat… F        19 "U Min Lwin"    09-Feb-21        19-Feb-21      
## 3 3     "Nay Nay W… M        18 "Unknown"       15-Feb-21        15-Feb-21      
## 4 4     "Thet Nain… M        37 "U Maung\rSan"  20-Feb-21        20-Feb-21      
## 5 5     "Wai Yan T… M        16 "Unknown"       20-Feb-21        20-Feb-21      
## # … with 6 more variables: Organization <chr>, Place of Incidents <chr>,
## #   Home Address <chr>, Township <chr>, States/Regions <chr>, Remarks <chr>
summary(imprisoned)
##        No             Name             Sex /Age             Age           
##  Min.   :  1.00   Length:308         Length:308         Length:308        
##  1st Qu.: 77.25   Class :character   Class :character   Class :character  
##  Median :153.50   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :153.50                                                           
##  3rd Qu.:229.75                                                           
##  Max.   :306.00                                                           
##  NA's   :2                                                                
##  Father's Name         Status          Date of Arrest     Section of Law    
##  Length:308         Length:308         Length:308         Length:308        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   Plaintiff         Current Condition     Prison            Address         
##  Length:308         Length:308         Length:308         Length:308        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Region/Sta te         Remark         
##  Length:308         Length:308        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
str(imprisoned)
## spec_tbl_df [308 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ No               : num [1:308] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Name             : chr [1:308] "Dr. Aung Moe\rNyo" "Nan Khin Htwe\rMyint" "Nyi Pu" "Bo Bo Wai\rMaung" ...
##  $ Sex /Age         : chr [1:308] "M" "F" "M" "M" ...
##  $ Age              : chr [1:308] "62" NA NA NA ...
##  $ Father's Name    : chr [1:308] "U Nyo" "U Saw Hla Tun" NA NA ...
##  $ Status           : chr [1:308] "MP (Regional Hluttaw,\rPwintbyu Township, Magwe\rRegion), Government (Chief\rMinister of Magwe Region),\rNLD (Secretary)," "MP (State Hluttaw, Hpa-an\rTownship, Karen State),\rGovernment (Chief Minister\rof Karen State), NLD (Central\r"| __truncated__ "MP (State Hluttaw, Gwa\rTownship, Rakhine State),\rGovernment (Chief Minister\rof Rakhine State), NLD\r(Central"| __truncated__ "MP (State Hluttaw, No (1)\rConstituency of Kyarinn\rSeikkyee Township, Karen\rState), Government (Minister\rof "| __truncated__ ...
##  $ Date of Arrest   : chr [1:308] "1-Feb-21" "1-Feb-21 and\r8-Feb-21" "1-Feb-21 and\r10-Feb-21" "1-Feb-21" ...
##  $ Section of Law   : chr [1:308] "Penal Code S: 505\r(b),  Natural\rDisaster\rManagement law\rS: 25" "Anti-Corruption\rLaw S: 55, Penal\rCode S: 505 (b)" "Penal Code S: 505\r(b)" "Penal Code S: 505\r(b)" ...
##  $ Plaintiff        : chr [1:308] NA NA "Deputy\rTownship\rAdministrat\ror Kyaw\rThein" NA ...
##  $ Current Condition: chr [1:308] "Sentenced to 2\ryears" "Sentenced to 2\ryears and 75 Years" "Sentenced to 2\ryears  with hard\rlabour" "Sentenced to 2\ryears" ...
##  $ Prison           : chr [1:308] NA "Hpa-An Prison" NA NA ...
##  $ Address          : chr [1:308] "Magwe" "Kayin" "Rakhine" "Kayin" ...
##  $ Region/Sta te    : chr [1:308] "Magwe" "Kayin" "Kayin" "Kayin" ...
##  $ Remark           : chr [1:308] "Myanmar Military Seizes Power and\rSenior NLD leaders including Daw Aung\rSan Suu Kyi and President U Win Myint"| __truncated__ "Myanmar Military Seizes Power and\rSenior NLD leaders including Daw Aung\rSan Suu Kyi and President U Win Myint"| __truncated__ "Myanmar Military Seizes Power and\rSenior NLD leaders including Daw Aung\rSan Suu Kyi and President U Win Myint"| __truncated__ "Myanmar Military Seizes Power and\rSenior NLD leaders including Daw Aung\rSan Suu Kyi and President U Win Myint"| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   No = col_double(),
##   ..   Name = col_character(),
##   ..   `Sex /Age` = col_character(),
##   ..   Age = col_character(),
##   ..   `Father's Name` = col_character(),
##   ..   Status = col_character(),
##   ..   `Date of Arrest` = col_character(),
##   ..   `Section of Law` = col_character(),
##   ..   Plaintiff = col_character(),
##   ..   `Current Condition` = col_character(),
##   ..   Prison = col_character(),
##   ..   Address = col_character(),
##   ..   `Region/Sta te` = col_character(),
##   ..   Remark = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
imprisoned %>%
  datasummary_skim()
Unique (#) Missing (%) Mean SD Min Median Max
No 307 1 153.5 88.5 1.0 153.5 306.0
imprisoned %>% head(5)
## # A tibble: 5 × 14
##      No Name    `Sex /Age` Age   `Father's Name` Status         `Date of Arrest`
##   <dbl> <chr>   <chr>      <chr> <chr>           <chr>          <chr>           
## 1     1 "Dr. A… M          62    U Nyo           "MP (Regional… "1-Feb-21"      
## 2     2 "Nan K… F          <NA>  U Saw Hla Tun   "MP (State Hl… "1-Feb-21 and\r…
## 3     3 "Nyi P… M          <NA>  <NA>            "MP (State Hl… "1-Feb-21 and\r…
## 4     4 "Bo Bo… M          <NA>  <NA>            "MP (State Hl… "1-Feb-21"      
## 5     5 "Min K… M          <NA>  <NA>            "MP (State Hl… "1-Feb-21"      
## # … with 7 more variables: Section of Law <chr>, Plaintiff <chr>,
## #   Current Condition <chr>, Prison <chr>, Address <chr>, Region/Sta te <chr>,
## #   Remark <chr>

Further Research: table and figure visualizations

We would like to research the relationship between some sector indicators, the conflicts numbers and the number of detainees in the township level.

First, we plot the dependent variable for analysis.

Table and visualizations on dependent variable

summary(detainees.employee.income.education.sex$detainees.per.1000)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001454 0.013967 0.035709 0.065479 0.084287 0.868666
plot(detainees.employee.income.education.sex$detainees.per.1000,xlab="detainee per 1000")

plot(log(detainees.employee.income.education.sex$detainees.per.1000),xlab="detainee per 1000, adjusted by log")

The detainee per 1000 is mainly concentrated below 1, the absolute number is small, and the gap between towns is more obvious after taking the log, showing a certain trend, which needs to be dismantled by the regression analysis afterwards.

Table and visualizations on univariate

employee.num <- detainees.employee$employee.num
summary(employee.num)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      33    1884    3351    4111    5637   18922
plot(employee.num)+abline(a=10000,b=0)

## integer(0)
employ.percent <-detainees.employee %>%
  mutate(num_gt_10k = case_when(employee.num>10000~1,
                                employee.num<=10000~0))

The scatter plot shows that the majority of towns still employ less than 10,000 people and have a relatively limited level of economic development. The percent of having employee number > 10,000 is 4.05%

literacy_quantiles = merged.for.regression$total.literacy %>% quantile(seq(0,1,0.1))
merged.for.regression <- mutate(merged.for.regression,literacy_bin = cut(total.literacy, literacy_quantiles, include.lowest=T))
merged.for.regression %>%
  count(literacy_bin)
## # A tibble: 10 × 2
##    literacy_bin     n
##    <fct>        <int>
##  1 [31.7,76.4]     17
##  2 (76.4,88.9]     19
##  3 (88.9,91.1]     15
##  4 (91.1,93]       20
##  5 (93,93.7]       15
##  6 (93.7,94.5]     18
##  7 (94.5,95.6]     15
##  8 (95.6,96.9]     18
##  9 (96.9,97.8]     17
## 10 (97.8,99.2]     15
plot(merged.for.regression$total.literacy) + abline(a=90,b=0)

## integer(0)
literacy.percent <-merged.for.regression %>%
  mutate(num_gt_90 = case_when(total.literacy>90~1,
                                total.literacy<=90~0))
plot(merged.for.regression$literacy_bin) 

High literacy rate overall.The percent of having literacy rate > 90% is 75.15%

sex.ratio.detainees <- merged.for.regression %>%
  mutate(sex_cate = case_when(sex.ratio <=100 ~"More Female",
                              sex.ratio > 100 ~"More Male")) 

ratio.new <- sex.ratio.detainees %>%
  group_by(sex_cate) %>%
  summarise(mean_detainees=mean(detainees.per.1000))
ratio.new
## # A tibble: 2 × 2
##   sex_cate    mean_detainees
##   <chr>                <dbl>
## 1 More Female         0.0675
## 2 More Male           0.0549

On average, the township with more women, which is defined as sex.ratio > 100 will turns to have 0.0125821 more detainees.ratio.1000, it may comes from woman turns to support Aung San Suu Kyi more in political campaigns.`

Next, we plot to learn the relationship between an important variable employee.num and detainees.per.1000

Table and visualizations on bivariate - employment and detainees.

qplot(data = detainees.employee, x = employee.num, y = detainees.per.1000) + stat_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

The correlation between the number of employees and the number of detainees.per.1000 is low, the corr coefficient is 0.0919718, overall the more employees in the area, the lower the number of arrests per 1,000 people.

Table and visualizations on bivariate - education and detainees.

merged.for.regression %>%
  #group_by(MIMU_township) %>%
  summarise(corr = cor(total.literacy, detainees))
## # A tibble: 1 × 1
##    corr
##   <dbl>
## 1 0.181
qplot(data = merged.for.regression, x = total.literacy, y = detainees.per.1000) + stat_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

base.plot <- ggplot(sex.ratio.detainees,aes(x=sex_cate,y=detainees.per.1000))
base.plot+geom_violin()

For the more female areas, the vast majority of towns have lower numbers of arrests per 1,000 and lower concentrations overall, but also receive extreme values.

Table and visualizations on trivariate and multivariate

qplot(data = detainees.employee.income, x = poverty.ratio, y = detainees.per.1000, colour = employee.num) + stat_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

From the figure above, we found the positive correlation between the poverty ratio and the number of detainees.

literacy.cate.base <- sex.ratio.detainees %>%
  mutate(literacy.cate= case_when(total.literacy >=mean(total.literacy) ~"Higher Literacy",
                              total.literacy < mean(total.literacy) ~"Lower Literacy"))

p.dt.sex.literacy <- ggplot(data=literacy.cate.base,aes(y=detainees.per.1000,x=literacy.cate,fill=sex_cate))

sex.literacy.colors <- c("#009E73", "#999999")
p.dt.sex.literacy + geom_bar(stat = "identity", position = "dodge") +
  ylab("Detainees.per.1000") + 
  xlab("Township's literacy category") +
  guides(fill = guide_legend(title = "Sex category status")) + 
  scale_fill_manual(values=sex.literacy.colors)

base.plot <- ggplot(data=literacy.cate.base,aes(y=detainees.per.1000,x=employee.num,color=sex_cate))
base.plot+geom_point()+facet_wrap(~literacy.cate)+ stat_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

The difference in detainess.per.1000 between the high and low literacy groups is brought about by the gap between the different gender groups. It is similarly influenced by the extreme values.Therefore, in the subsequent analysis, we will use detainess per 1000 after taking logs to eliminate the effect of extreme values.

A final figure summarizes the results in the multivariate descriptive analysis, where the difference in detainee.per.1000 between literacy rates was not significant, but the detainees.per.1000 were more dispersed in the high literacy rate group. An overall negative trend was observed.

Table on fallen data: bivariate and trivariate

detainees %>%
  group_by(`Region/State`) %>%
  summarise(., n())
## # A tibble: 151 × 2
##    `Region/State`               `n()`
##    <chr>                        <int>
##  1 ,B Sahgwoegyin Town              2
##  2 ,K Wacahiningmaw To              3
##  3 ,M Paguwke Township              2
##  4 ,S Kagaalainy gTownshi           1
##  5 1M) aWgwared, Taungd             1
##  6 A Vyiellyaagrew, aTdhdayb ou     1
##  7 aAuybeiyna Trwowadndsyh ip       1
##  8 Aipyeyarwaddy                    5
##  9 ALyaebyuatrtwa aTdodwyn sh       1
## 10 aMdaynadra Tlaoywnship           2
## # … with 141 more rows

We would like to research the relation between the number of people detained per town and the number of people fallen per town.

  • First, summarize the number of detainees per town.
detained.per.town <- detainees %>%
  # delete the null value in `Township`
    filter(!is.na(.$`Region/State`)) %>%
  group_by(`Region/State`) %>%
  count() %>%
  mutate(detained.num = n) %>%
  select(`Region/State`, detained.num)

detained.per.town %>% head(5)
## # A tibble: 5 × 2
## # Groups:   Region/State [5]
##   `Region/State`         detained.num
##   <chr>                         <int>
## 1 ,B Sahgwoegyin Town               2
## 2 ,K Wacahiningmaw To               3
## 3 ,M Paguwke Township               2
## 4 ,S Kagaalainy gTownshi            1
## 5 1M) aWgwared, Taungd              1
  • Second, summarize the number of fallens per town.
fallen.per.town <- fallen %>%
  # delete the null value in `Township`
    filter(!is.na(.$`States/Regions`)) %>%
  group_by(`States/Regions`) %>%
  count() %>%
  mutate(fallen.num = n) %>%
  select(`States/Regions`, fallen.num)

fallen.per.town %>% head(5)
## # A tibble: 5 × 2
## # Groups:   States/Regions [5]
##   `States/Regions` fallen.num
##   <chr>                 <int>
## 1 Ayeyarwady               15
## 2 Bago                     50
## 3 Chin                     16
## 4 Kachin                   17
## 5 Kayah                    15

We would like to briefly analyze the detainee and the fallen dataset.

Table on fallen data: bivariate
fallen %>% 
  datasummary(data = ., Sex ~  Age * Mean)
Sex Mean
F 34.52
LGBT 37.00
M 31.61
Table on fallen data: trivariate
fallen %>% 
  datasummary(data = ., `States/Regions` ~ Sex * Age * Mean)
F
LGBT
M
States/Regions Mean Mean Mean
Ayeyarwady 27.47
Bago 38.00 31.44
Chin 33.50 31.36
Kachin 40.00 35.57
Kayah 15.00 30.77
Magway 52.20 36.23
Mandalay 26.60 37.00 31.29
Mon 19.00 28.09
Naypyitaw 19.00 38.20
Sagaing 30.50 32.35
Shan 68.33 26.77
Tanintharyi 31.00 30.69
Yangon 40.40 30.64

Statistical Test

fallen.2sex <- fallen %>%
  filter(Sex != "LGBT")

fallen.sex.t.test <- t.test(Age ~ Sex, data = fallen.2sex)
fallen.sex.t.test
## 
##  Welch Two Sample t-test
## 
## data:  Age by Sex
## t = 1.1558, df = 64.761, p-value = 0.252
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -2.118779  7.939498
## sample estimates:
## mean in group F mean in group M 
##        34.51613        31.60577
fallen.sex.t.test$p.value
## [1] 0.2519998

According to the t test above, the p value is 0.2519998, which is much larger than 0.05, so we could conclude that the age of fallen people do not have signifiant difference within male and female.

Data Summarizations on Merged Data

As aboved steps show, we finally select some matrixes from both MIMU and ACLED dataset. Now, based on the data, we want to do some summarization and visualization.

merged.for.regression %>% head(5)
## # A tibble: 5 × 14
##   MIMU_township State_Region detainees.per.1000 detainees employee.num
##   <chr>         <chr>                     <dbl>     <dbl>        <dbl>
## 1 Ahlone        Yangon                  0.0180          1         1115
## 2 Aunglan       Magway                  0.00425         1         4481
## 3 Ayadaw        Sagaing                 0.0642         10         2021
## 4 Bago          Bago (East)             0.0895         44         9162
## 5 Bahan         Yangon                  0.176          17         2641
## # … with 9 more variables: poverty.ratio <dbl>, food.poverty.index <dbl>,
## #   poverty.gap.ratio <dbl>, fem.literacy <dbl>, male.literacy <dbl>,
## #   total.literacy <dbl>, sex.ratio <dbl>, conflicts.num <int>,
## #   literacy_bin <fct>
str(merged.for.regression)
## tibble [169 × 14] (S3: tbl_df/tbl/data.frame)
##  $ MIMU_township     : chr [1:169] "Ahlone" "Aunglan" "Ayadaw" "Bago" ...
##  $ State_Region      : chr [1:169] "Yangon" "Magway" "Sagaing" "Bago (East)" ...
##  $ detainees.per.1000: num [1:169] 0.01802 0.00425 0.0642 0.08953 0.17575 ...
##  $ detainees         : num [1:169] 1 1 10 44 17 1 2 7 5 1 ...
##  $ employee.num      : num [1:169] 1115 4481 2021 9162 2641 ...
##  $ poverty.ratio     : num [1:169] 16.1 27 15.1 20.2 16.1 16.3 32.2 16.1 15.1 27 ...
##  $ food.poverty.index: num [1:169] 2.4 3.6 1.3 2.8 2.4 3.6 6.1 2.4 1.3 3.6 ...
##  $ poverty.gap.ratio : num [1:169] 0.02 0.04 0.02 0.03 0.02 0.03 0.05 0.02 0.02 0.04 ...
##  $ fem.literacy      : num [1:169] 97.9 90.5 86.7 90.3 98.1 83.5 90.4 97.6 92.1 88.6 ...
##  $ male.literacy     : num [1:169] 99.2 96.9 95.9 95.6 99.4 89.8 95.8 99.2 97.9 96.9 ...
##  $ total.literacy    : num [1:169] 98.5 93.4 90.7 92.7 98.7 86.3 93 98.3 94.5 92.1 ...
##  $ sex.ratio         : num [1:169] 85.4 90.5 83.2 92 88.9 94.4 97.5 90 80.5 80.1 ...
##  $ conflicts.num     : int [1:169] 22 20 42 97 51 36 11 17 50 37 ...
##  $ literacy_bin      : Factor w/ 10 levels "[31.7,76.4]",..: 10 5 3 4 10 2 4 10 6 4 ...
##  - attr(*, "na.action")= 'omit' Named int [1:2] 40 172
##   ..- attr(*, "names")= chr [1:2] "40" "172"
summary(merged.for.regression)
##  MIMU_township      State_Region       detainees.per.1000   detainees    
##  Length:169         Length:169         Min.   :0.001454   Min.   : 1.00  
##  Class :character   Class :character   1st Qu.:0.014046   1st Qu.: 2.00  
##  Mode  :character   Mode  :character   Median :0.035833   Median : 5.00  
##                                        Mean   :0.066076   Mean   :10.76  
##                                        3rd Qu.:0.084818   3rd Qu.:11.00  
##                                        Max.   :0.868666   Max.   :97.00  
##                                                                          
##   employee.num   poverty.ratio   food.poverty.index poverty.gap.ratio
##  Min.   :   33   Min.   :11.40   Min.   : 0.300     Min.   :0.01000  
##  1st Qu.: 1881   1st Qu.:16.10   1st Qu.: 2.400     1st Qu.:0.02000  
##  Median : 3338   Median :20.20   Median : 3.600     Median :0.03000  
##  Mean   : 4123   Mean   :25.67   Mean   : 5.149     Mean   :0.04296  
##  3rd Qu.: 5690   3rd Qu.:32.20   3rd Qu.: 6.100     3rd Qu.:0.05000  
##  Max.   :18922   Max.   :73.30   Max.   :25.000     Max.   :0.17000  
##                                                                      
##   fem.literacy   male.literacy   total.literacy   sex.ratio     
##  Min.   :27.40   Min.   :35.80   Min.   :31.7   Min.   : 74.80  
##  1st Qu.:86.50   1st Qu.:94.30   1st Qu.:90.1   1st Qu.: 88.00  
##  Median :91.50   Median :96.60   Median :93.7   Median : 91.50  
##  Mean   :87.89   Mean   :93.63   Mean   :90.5   Mean   : 92.49  
##  3rd Qu.:94.70   3rd Qu.:98.10   3rd Qu.:96.2   3rd Qu.: 95.60  
##  Max.   :99.00   Max.   :99.50   Max.   :99.2   Max.   :174.20  
##                                                                 
##  conflicts.num         literacy_bin
##  Min.   :  2.00   (91.1,93]  :20   
##  1st Qu.: 15.00   (76.4,88.9]:19   
##  Median : 36.00   (93.7,94.5]:18   
##  Mean   : 54.32   (95.6,96.9]:18   
##  3rd Qu.: 77.00   [31.7,76.4]:17   
##  Max.   :289.00   (96.9,97.8]:17   
##  NA's   :3        (Other)    :60
merged.for.regression %>%
  datasummary_skim()
Unique (#) Missing (%) Mean SD Min Median Max
detainees.per.1000 169 0 0.1 0.1 0.0 0.0 0.9
detainees 40 0 10.8 15.7 1.0 5.0 97.0
employee.num 168 0 4122.6 3205.7 33.0 3338.0 18922.0
poverty.ratio 17 0 25.7 13.4 11.4 20.2 73.3
food.poverty.index 16 0 5.1 5.2 0.3 3.6 25.0
poverty.gap.ratio 8 0 0.0 0.0 0.0 0.0 0.2
fem.literacy 113 0 87.9 12.1 27.4 91.5 99.0
male.literacy 85 0 93.6 9.4 35.8 96.6 99.5
total.literacy 103 0 90.5 10.7 31.7 93.7 99.2
sex.ratio 124 0 92.5 10.3 74.8 91.5 174.2
conflicts.num 99 2 54.3 56.2 2.0 36.0 289.0

When we run regression, we don’t want use missing data. So all the missing data would be dropped in the final model regression process.

merged.data <- merged.for.regression %>%
  filter(!is.na(conflicts.num))
merged.data %>% head(5)
## # A tibble: 5 × 14
##   MIMU_township State_Region detainees.per.1000 detainees employee.num
##   <chr>         <chr>                     <dbl>     <dbl>        <dbl>
## 1 Ahlone        Yangon                  0.0180          1         1115
## 2 Aunglan       Magway                  0.00425         1         4481
## 3 Ayadaw        Sagaing                 0.0642         10         2021
## 4 Bago          Bago (East)             0.0895         44         9162
## 5 Bahan         Yangon                  0.176          17         2641
## # … with 9 more variables: poverty.ratio <dbl>, food.poverty.index <dbl>,
## #   poverty.gap.ratio <dbl>, fem.literacy <dbl>, male.literacy <dbl>,
## #   total.literacy <dbl>, sex.ratio <dbl>, conflicts.num <int>,
## #   literacy_bin <fct>
merged.data %>%
  datasummary_skim()
Unique (#) Missing (%) Mean SD Min Median Max
detainees.per.1000 166 0 0.1 0.1 0.0 0.0 0.9
detainees 40 0 10.9 15.8 1.0 5.0 97.0
employee.num 165 0 4125.3 3204.5 58.0 3334.0 18922.0
poverty.ratio 17 0 25.8 13.5 11.4 22.7 73.3
food.poverty.index 16 0 5.2 5.3 0.3 3.6 25.0
poverty.gap.ratio 8 0 0.0 0.0 0.0 0.0 0.2
fem.literacy 111 0 87.8 12.2 27.4 91.5 98.7
male.literacy 85 0 93.6 9.4 35.8 96.6 99.5
total.literacy 102 0 90.4 10.7 31.7 93.7 99.1
sex.ratio 123 0 92.2 9.4 74.8 91.5 174.2
conflicts.num 98 0 54.3 56.2 2.0 36.0 289.0

MODELS AND ANALYSIS

Regression Model 1: regression on all variables

Linear Regression Model:

Now we run regression of detainees.per.1000 on all other variables, and analyze the results:

options(scipen=4)
lm.all.variables <- lm(data=merged.data,
                       detainees.per.1000 ~ employee.num + poverty.ratio + 
                         food.poverty.index + poverty.gap.ratio + 
                         fem.literacy + male.literacy + 
                         sex.ratio + conflicts.num)
summary(lm.all.variables)
## 
## Call:
## lm(formula = detainees.per.1000 ~ employee.num + poverty.ratio + 
##     food.poverty.index + poverty.gap.ratio + fem.literacy + male.literacy + 
##     sex.ratio + conflicts.num, data = merged.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16139 -0.03934 -0.00950  0.02846  0.61527 
## 
## Coefficients:
##                        Estimate   Std. Error t value   Pr(>|t|)    
## (Intercept)         0.291082057  0.125205440   2.325   0.021361 *  
## employee.num       -0.000001942  0.000002195  -0.885   0.377658    
## poverty.ratio      -0.000670352  0.002783850  -0.241   0.810024    
## food.poverty.index  0.028733193  0.009188803   3.127   0.002105 ** 
## poverty.gap.ratio  -3.016161561  2.103832360  -1.434   0.153660    
## fem.literacy        0.008985560  0.001961178   4.582 0.00000935 ***
## male.literacy      -0.008778279  0.002477981  -3.543   0.000522 ***
## sex.ratio          -0.002336097  0.000740271  -3.156   0.001919 ** 
## conflicts.num       0.000520062  0.000112132   4.638 0.00000737 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07888 on 157 degrees of freedom
## Multiple R-squared:  0.3206, Adjusted R-squared:  0.286 
## F-statistic: 9.261 on 8 and 157 DF,  p-value: 2.058e-10

Before we analyze the result, we first get into the correlation of the independent variables and take the assessment for collinearity.

Correlation analysis:

var.names <- c("employee.num", "poverty.ratio", "food.poverty.index", "poverty.gap.ratio", "fem.literacy", "male.literacy", "sex.ratio","conflicts.num")
merged.data %>%
  select(all_of(var.names)) %>%
  pairs()

From the correlation figures above, we can find that there are strong correlations between poverty.ratio, food.poverty.index and poverty.gap.ratio. In addition, fem.literacy and male.literacy have strong correlations. Therefore, we should delete some variables to fine tune the model.

Regression Model 2: regression on selected variables

Linear Regression Model:

lm.all.variables.refined <- lm(data=merged.data,
                               detainees.per.1000 ~ employee.num + food.poverty.index + 
                                 sex.ratio + fem.literacy + conflicts.num)
summary(lm.all.variables.refined)
## 
## Call:
## lm(formula = detainees.per.1000 ~ employee.num + food.poverty.index + 
##     sex.ratio + fem.literacy + conflicts.num, data = merged.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12893 -0.04202 -0.01660  0.01904  0.66506 
## 
## Coefficients:
##                        Estimate   Std. Error t value Pr(>|t|)    
## (Intercept)        -0.048733272  0.098331338  -0.496 0.620855    
## employee.num       -0.000004970  0.000002139  -2.323 0.021439 *  
## food.poverty.index  0.005003700  0.001448059   3.455 0.000704 ***
## sex.ratio          -0.001278334  0.000736266  -1.736 0.084446 .  
## fem.literacy        0.002277046  0.000634194   3.590 0.000439 ***
## conflicts.num       0.000483128  0.000117541   4.110 0.000063 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08369 on 160 degrees of freedom
## Multiple R-squared:  0.2208, Adjusted R-squared:  0.1964 
## F-statistic: 9.067 on 5 and 160 DF,  p-value: 0.0000001311

Regression diagnostics analysis

After getting the basic regression model, we would like to look at the diagnostic plots and access whether it is reasonable to use this model.

plot(lm.all.variables.refined, sub.caption = "Diagnostics Analysis")

Look at the four plots.

First of all, we expect Residuals vs. Fitted to have constant variance and consider residuals and fitted values uncorrelated. However, it seems like they are not uncorrelated. As for Normal QQ plot, it looks a little right-skewed, and the residuals from the regression are not normally distributed.

In Scale-location plot plot, there is no discernible trends.

But in Residuals vs Leverage plot, it seems that there are some obvious outliers.

Based on the diagnostics analysis, we assume there is a better version for this regression model. Considering the QQ plot, we expect to do log calculation on the dependent variable and do linear regression on log(detainees.per.1000).

Regression Model 3: log calculation on dependent variable

Linear Regression Model:

lm.all.variables.refined.log1 <- lm(data=merged.data,
                               log(detainees.per.1000) ~ employee.num + food.poverty.index + 
                                 fem.literacy + conflicts.num + sex.ratio)
plot(lm.all.variables.refined.log1, sub.caption = "Diagnostics Analysis")

summary(lm.all.variables.refined.log1)
## 
## Call:
## lm(formula = log(detainees.per.1000) ~ employee.num + food.poverty.index + 
##     fem.literacy + conflicts.num + sex.ratio, data = merged.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1730 -0.6564 -0.0167  0.7525  2.5871 
## 
## Coefficients:
##                       Estimate  Std. Error t value    Pr(>|t|)    
## (Intercept)        -4.36376676  1.18293744  -3.689    0.000308 ***
## employee.num       -0.00006342  0.00002574  -2.464    0.014791 *  
## food.poverty.index  0.04331169  0.01742032   2.486    0.013936 *  
## fem.literacy        0.02491904  0.00762943   3.266    0.001334 ** 
## conflicts.num       0.00763076  0.00141403   5.396 0.000000241 ***
## sex.ratio          -0.01706271  0.00885737  -1.926    0.055829 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.007 on 160 degrees of freedom
## Multiple R-squared:  0.2471, Adjusted R-squared:  0.2236 
## F-statistic:  10.5 on 5 and 160 DF,  p-value: 9.81e-09

Looking at the p-value of sex.ratio, its p-value is 0.05583 and it is greater than 0.05. So it would be better if we dropped this column.

Regression Model 4: use selected variables to regress on log(dependent variable)

Linear Regression Model:

Now drop the variable and rerun the regression.

lm.all.variables.refined.log <- lm(data=merged.data,
                               log(detainees.per.1000) ~ employee.num +
                                 food.poverty.index + fem.literacy + conflicts.num)
summary(lm.all.variables.refined.log)
## 
## Call:
## lm(formula = log(detainees.per.1000) ~ employee.num + food.poverty.index + 
##     fem.literacy + conflicts.num, data = merged.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.06098 -0.69550  0.03575  0.74387  2.50645 
## 
## Coefficients:
##                       Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)        -6.19601109  0.70924030  -8.736 2.98e-15 ***
## employee.num       -0.00006797  0.00002584  -2.630 0.009365 ** 
## food.poverty.index  0.04163700  0.01754448   2.373 0.018813 *  
## fem.literacy        0.02847186  0.00746523   3.814 0.000195 ***
## conflicts.num       0.00717594  0.00140587   5.104 9.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.015 on 161 degrees of freedom
## Multiple R-squared:  0.2297, Adjusted R-squared:  0.2105 
## F-statistic:    12 on 4 and 161 DF,  p-value: 0.00000001468

Regression diagnostics analysis

plot(lm.all.variables.refined.log, sub.caption = "Diagnostics Analysis")

Now look at the four plots again.

Residuals vs. Fitted: It is obvious that residuals and fitted are unrelated with each other.

Normal QQ plot, The residuals from the regression are almost normally distributed. It is more normal distributed than the previous model.

Scale-location plot, there is no discernible trends.

Residuals vs Leverage , it seems that there are no apparent outliers.

Therefore, based on the diagnostics analysis, we consider it is reasonable to use this model. Our final model is to run linear regression of log(detainees.per.1000) on employee.num, food.poverty.index, fem.literacy and conflicts.num.

RESULTS

summary(lm.all.variables.refined.log)
## 
## Call:
## lm(formula = log(detainees.per.1000) ~ employee.num + food.poverty.index + 
##     fem.literacy + conflicts.num, data = merged.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.06098 -0.69550  0.03575  0.74387  2.50645 
## 
## Coefficients:
##                       Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)        -6.19601109  0.70924030  -8.736 2.98e-15 ***
## employee.num       -0.00006797  0.00002584  -2.630 0.009365 ** 
## food.poverty.index  0.04163700  0.01754448   2.373 0.018813 *  
## fem.literacy        0.02847186  0.00746523   3.814 0.000195 ***
## conflicts.num       0.00717594  0.00140587   5.104 9.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.015 on 161 degrees of freedom
## Multiple R-squared:  0.2297, Adjusted R-squared:  0.2105 
## F-statistic:    12 on 4 and 161 DF,  p-value: 0.00000001468
lm.coef <- round(summary(lm.all.variables.refined.log)$coef, 5)

From the summary report, we can see that based on the confidence leve of 95%, employee.num, food.poverty.index, fem.literacy and conflicts.num are all statistically significant predictors of log(detainees.per.1000). The overall p-value is significantly less than 0.05. The p-value of employee.num is 0.00936, the p-value of food.poverty.index is 0.01881, the p-value of fem.literacy is 0.00019, the p-value of conflicts.num is 0. They are all smaller than 0.05.

When all else being equal between two townships, a 1 employment increase in average employee number appears to be associated with a -0.00007 decrease in log(detainment rates per thousand). When all else being equal between two townships, a 1 food poverty increase in average appears to be associated with a 0.04164 increase in log(detainment rates per thousand). When all else being equal between two townships, a 1 fem.literacy increase in average appears to be associated with a 0.02847 increase in log(detainment rates per thousand). When all else being equal between two townships, a 1 conflict number increase in average appears to be associated with a 0.00718 increase in log(detainment rates per thousand).

We can write the model as: \[ log(detaineesPer1000) = \text-6.196 - \text0.00007 \times \text{employeeNum} + \text0.042 \times \text{povertyIndex} + \text0.028 \times \text{fem.literacy} + \text0.007 \times \text{conflictNum} \]

DISCUSSIONS

Limitations of the Analysis

Although we have 5 data sets, several columns have too many missing values. For example, almost detainees and imprisoned don’t have a valid age number. Therefore, we have to give up some columns that may have useful information.

In addition, the ACLED data set has only valid data that are based on 2021. However, the data from MIMU data set don’t have data on 2021. They only have data before 2020. What’s worse, it has so many missing columns that we have to use data from different years for analysis. For example, when selecting variable, we used the Employee Number, Female Literacy that are from 2014, Poverty data that are from 2010, but conflicts number data that are from 2021. This too large time span may lead to some problems.

Beside, when preprocessing data, we simply drop the missing value. It may have some other ways to make use of them like filling the missing value with average data. And we didn’t pay too much attention on outliers, but it is quite reasonable because we will do log calculation on dependent variables. The outliers would disappear in that case.

About the dependent variables, there are also some limitations. The value of Female literacy as a percentage of total female population is almost all greater than 90% and the variance is quite small. Although it is related to the detainment rates per thousand, it is possible that they don’t have cause and effect relationship.

Inferences from Results

From the model, we can see that the detainment rates per thousand is positively influenced by female literacy percentage, poverty index and conflict number. It is negatively influenced by employee number.

Therefore, the higher the employment number is, the less the detainees percentage will be. The less the female literacy percentage is, the less the detainees percentage will be. The less the poverty index is, the less the detainees percentage will be. The less the conflict numberis, the less the detainees percentage will be.

Potential Stakeholders

We can connect some background about the detainee event with our analysis. As it is said in the introduction of final project, the background is that the Burmese military staged a coup, toppling the quasi-democratic government and removing Aung San Suu Kyi, the civilian leader supported by the National League of Democracy.

The potential stakeholders can be the Burmese military, Burmese government, or some other international organizations who want to stabilize the situation and make detainee number reduce.

Contextualization and Policy

To calm down such an event, it is important that reduce the local conflicts number. It would be wiser if some policies about bans on conflicts are announced. What’s more, government can try to develop the local economy, to relief the poverty.

Conclusions

In summary, the detainment rates per thousand is positively related to poverty index, female literacy, conflict number and negatively related to employee number. The linear regression model can be shown like below. \[ log(detaineesPer1000) = \text-6.196 - \text0.00007 \times \text{employeeNum} + \text0.042 \times \text{povertyIndex} + \text0.028 \times \text{fem.literacy} + \text0.007 \times \text{conflictNum} \]

Future Directions

For further analysis, we can gain more variables from these data sets instead of just 8 variables. When dealing with missing values, if there is only a few missing, we can use some method like average value to fill them. Also, we can pay more attention to the outliers in case of bad effects on the model.